#!/usr/bin/env python3

#==============================================================================
# Functionality
#==============================================================================
import pdb
import sys
import os
import re
import scrap_common as scr
import traceback

# utility funcs, classes, etc go here.

def asserting(cond):
    if not cond:
        pdb.set_trace()
    assert(cond)

def has_stdin():
    return not sys.stdin.isatty()

def pad(x):
    asserting(is_string(x))
    if not x.startswith(' '):
        x = ' ' + x
    if not x.endswith(' '):
        x += ' '
    return x

def isreg(rx):
    return isinstance(rx, type(re.compile('')))

def reg(pat, flags=0):
    if isreg(pat):
        return pat
    return re.compile(pat, re.VERBOSE | flags)

def respec(spec):
    if isinstance(spec, int):
        val = r'\%d' % spec
    elif spec.startswith('\\'):
        val = spec
    else:
        asserting(False)
    # if args.verbose:
    #     print('respec %s' % repr(spec), repr(val))
    return val

def regroups(rx, m, groups):
    if m:
        #pdb.set_trace()
        if isinstance(groups, list):
            return [m.expand(respec(name)) for name in groups]
        else:
            return m.expand(respec(groups))

def parampat(rx):
    if isreg(rx):
        return rx
    return reg(r'(?: \s+ ) %s (?: \s+)' % rx)

def reget(rx, groups, data, attr):
    rx = parampat(rx)
    data = pad(data)
    ret = getattr(rx, attr)(data)
    return regroups(rx, ret, groups)

def rematch(rx, groups, data):
    return reget(rx, groups, data, attr='match')

def research(rx, groups, data):
    return reget(rx, groups, data, attr='search')

# def okgroups(groups):
#     for group in groups:
#         if group and len(group) > 0:
#             return group

#==============================================================================
# Cmdline
#==============================================================================
import argparse

parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, 
    description="""
TODO
""")
     
parser.add_argument('-v', '--verbose',
    action="store_true",
    help="verbose output" )
     
parser.add_argument('-d', '--delim',
    default=r',\s',
    help="define the standard column separators.  Defaults to [,] and [\s] (comma or whitespace)" )
     
parser.add_argument('-s', '--sep',
    action="append",
    help="define additional column separators." )
     
parser.add_argument('-m', '--measure',
    action="store_true",
    help="measure the size of each column" )
     
parser.add_argument('-j', '--join',
    default=None,
    type=str,
    help="join each column by the specified string" )

args = None

#==============================================================================
# Main
#==============================================================================

INT = r'([-]?\d+)'

def getspec(spec):
    unquot = False
    rhs = False
    if spec.find(',') >= 0:
        spec = re.subn(r'[,]', '', spec, 1)[0]
        unquot = True
    if spec.find('r') >= 0:
        spec = re.subn(r'[r]', '', spec, 1)[0]
        rhs = True
    flags = {'rhs': rhs, 'unquote': unquot}

    x = rematch(r'%s [:] %s' % (INT, INT), [1, 2], spec)
    if x:
        return int(x[0]), ':', int(x[1]), flags

    start = rematch(r'%s [:]' % INT, 1, spec)
    if start:
        return int(start), ':', None, flags

    end = rematch(r'[:] %s' % INT, 1, spec)
    if end:
        return None, ':', int(end), flags

    start = rematch(r'%s' % INT, 1, spec)
    if start:
        return int(start), None, None, flags

    asserting(False)

def get(cols, spec):
    start, mid, end, flags = getspec(spec)
    if args.verbose:
        print([start, mid, end, flags])
    if mid != ':':
        asserting((start != None) and (end == None))
        if start < len(cols) and start >= -len(cols):
            val = cols[start]
        else:
            val = []
    else:
        val = cols[start:end]
    val = process_col(val, flags)
    if flags['unquote']:
        val[0] = unquote(val[0])
    return val


def list2str (args):
    if not args or len(args) <= 0:
        return ''
    return ', '.join(map(str, args))





def process(line):
    line = line.rstrip()
    line = unquote(quote(line))

    #
    # see http://stackoverflow.com/questions/16710076/python-split-a-string-respect-and-preserve-quotes
    #
    # >>> s = r'a=foo, b=bar, c="foo, bar", d=false, e="false", f="foo\", bar"'
    # >>> re.findall(r'(?:[^\s,"]|"(?:\\.|[^"])*")+', s)
    # ['a=foo', 'b=bar', 'c="foo, bar"', 'd=false', 'e="false"', 'f="foo\\", bar"']
    #
    if args.verbose:
        print('line:', repr(line))
    #cols = reg(r'(?: [^\s,"] | "(?: \\. | [^"] )*" )+').findall(line)
    delims = list2str(args.sep or []) + (args.delim or [])
    if args.verbose:
        print('delims:', repr(delims))
    pat = r'(?: [^%s"] | "(?: \\. | [^"] )*" )+' % delims
    if args.verbose:
        print('pat:', repr(pat))
    #pdb.set_trace()
    cols = reg(pat).findall(line)
    if len(args.args) >= 1:
        col = [get(cols, x) for x in args.args]
    else:
        # if no args were specified, then get everything.
        col = [get(cols, str(i)) for i in range(len(cols))]
    if args.verbose:
        print('col:', col)
    return col

def is_string(x):
    return isinstance(x, str)

R1 = re.compile(r'\\(.)')
R2 = re.compile(r'\\\\')

def unq_bs(x):
    val = ''.join(x.split('\\'))
    val = x.replace('\\\\', '~TKTK!TK~')
    val = val.replace('\\', '')
    val = val.replace('~TKTK!TK~', '\\')
    return val

# https://stackoverflow.com/questions/14820429/how-do-i-decodestring-escape-in-python3
def string_escape(x, encoding='utf-8'):
  s = x.replace(r'\ ', ' ') # handle DeprecationWarning: invalid escape sequence '\ '
  try:
    s = s.encode('latin1')         # To bytes, required by 'unicode-escape'
    s = s.decode('unicode-escape') # Perform the actual octal-escaping decode
    s = s.encode('latin1')         # 1:1 mapping back to bytes
    s = s.decode(encoding)        # Decode original encoding
    return s
  except (UnicodeEncodeError, UnicodeDecodeError):
    return x

def unquote(x):
    if not is_string(x):
        assert not isinstance(x, bytes)
        return x
    if len(x) < 2:
        return x
    if (x[0] == x[-1]) and (x[0] == '"'):
        x = x[1:-1]
    try:
        bs = ''
        while x.endswith('\\'):
            bs += '\\'
            x = x[0:-1]
        #
        x = string_escape(x)
        #
        x += bs
    except ValueError as e:
        traceback.print_exc()
        pdb.set_trace()
        asserting(False)

    x = R1.sub(r'\1', x) # unquote
    #x = R2.sub('\\', x) # unquote backslashes

    #x = x.replace(r'\\ \\', '\\') # unquote backslashes
    #x = x.replace(r'\"', '"')

    #x = unq_bs(x)

    return x

import json

def quote(x):
    if not is_string(x):
        return x
    if not reg(r' (?<! [\\] ) \s').search(x):
        return x
    val = '"%s"' % string_escape(unquote(x)).replace('"', r'\"')
    #val = json.dumps(x)
    return val

NIL = {'rhs':False, 'unquote':False}

def process_col(col, flags):
    col = scr.listify(col)
    if len(col) < 1:
        return ['NIL', NIL]
    asserting(len(col) >= 1)
    col = list(col)
    for i in range(len(col)):
        col[i] = unquote(col[i])
    return [quote(unquote(string_escape(args.join or ' ').join(col))), flags]

def padstr(s, w, rhs):
    spec = ('%d' % w)
    if rhs:
        spec = '%' + spec + 's'
    else:
        spec = '%-' + spec + 's'
    return spec % s

dumpstdin = False

def decode_line(line):
  if isinstance(line, bytes):
    try:
      line = line.decode('utf-8')
    except UnicodeDecodeError:
      line = line.decode('latin1')
  return line

def stdinlines():
    if dumpstdin:
        with open(testfile(), "w") as f:
            for line in sys.stdin.buffer:
                line = decode_line(line)
                f.write(line)
                yield line
    elif False:
        for line in sys.stdin.buffer:
            line = decode_line(line)
            yield line
    else:
        while True:
            line = sys.stdin.buffer.readline()
            if not line:
              break
            line = decode_line(line)
            yield line

def flush(gather):
    sizes = []
    for row in gather:
        while len(row) > len(sizes):
            sizes += [0]
        for i in range(len(row)):
            col = row[i]
            if len(col[0]) > sizes[i]:
                sizes[i] = len(col[0])
    for row in gather:
        cols = []
        line = ''
        for i in range(len(row)):
            w = sizes[i]
            col, flags = row[i][0], row[i][1]
            col = padstr(col, w, rhs=flags['rhs'])
            if i > 0:
                line += string_escape(args.join or '  ')
                #line += '  '
            line += col
        sys.stdout.write(line.rstrip() + '\n')
        sys.stdout.flush()
    while len(gather) > 0:
      gather.pop()
    
def run():
    asserting(rematch(r'(\d+)', 1, " 123 ") == "123")
    if args.verbose:
        print(args)
    gather = []
    global dumpstdin
    #dumpstdin = has_stdin()
    dumpstdin = False
    def read(line):
      nonlocal gather
      row = process(line)
      gather += [row]
      if not args.measure:
        flush(gather)
    if not has_stdin():
        if False:
            read("foo bar baz wuf")
        elif True:
            with open(testfile(), 'r') as f:
                for line in f:
                    read(line)
        else:
            # if there was no input, prompt user.
            print('Enter input (press Ctrl-D when done):')
            for line in stdinlines():
                read(line)
    else:
        for line in stdinlines():
            read(line)
    # final.
    flush(gather)

def testfile():
    return os.path.expanduser('~/bin/test.txt')

def main():
    try:
        global args
        if not args:
            args, leftovers = parser.parse_known_args()
            args.args = leftovers
        return run()
    except IOError:
        # http://stackoverflow.com/questions/15793886/how-to-avoid-a-broken-pipe-error-when-printing-a-large-amount-of-formatted-data
        try:
            sys.stdout.close()
        except IOError:
            pass
        try:
            sys.stderr.close()
        except IOError:
            pass


if __name__ == "__main__":
    main()

